import datetime
from calendar import monthrange
from dateutil.relativedelta import relativedelta
import numpy as np
import pandas as pd
from pivottablejs import pivot_ui
from IPython.core.display import HTML
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = 'simple_white'
## Jupyter widgets + display
from ipywidgets import interact, interactive, fixed, interact_manual,Layout
import ipywidgets as widgets
from IPython.core.display import HTML
from IPython.display import Image
import ipyplot
base_price = pd.read_excel('Cloud invoicing.xlsx', sheet_name='Cluster prices')
log_activity = pd.read_excel('Cloud invoicing.xlsx', sheet_name='System output')
log_activity = log_activity.sort_values(by=['Customer', 'Transactions date'])
Following assumption is made:
What can be extracted from the pivot: Per month per client
first_date = log_activity['Transactions date'].min().replace(day=1)
last_date = max(log_activity['Transactions date'].max().replace(day=1),datetime.date.today())
list_billing_months = pd.date_range(first_date, last_date, freq = 'MS').tolist()
list_customers = log_activity['Customer'].unique()
all_customer_df = pd.DataFrame()
for c in list_customers:
end_of_month = pd.Period(last_date,freq='M').end_time
idx = pd.date_range(first_date,end_of_month)
df = pd.DataFrame(log_activity[log_activity['Customer'] == c][['Transactions date','Cluster #1', 'Cluster #2','Cluster #3','Cluster #4']])
df = df.set_index('Transactions date')
df = df.reindex(idx, method='ffill')
df = df.fillna(0)
df['Customer'] = c
all_customer_df = pd.concat([all_customer_df, df])
all_customer_df.index = pd.to_datetime(all_customer_df.index)
billing_df = pd.DataFrame()
for m in list_billing_months:
end_of_month = pd.Period(m,freq='M').end_time
num_days = monthrange(m.year, m.month)[1]
x = all_customer_df.loc[(all_customer_df.index >= m) & (all_customer_df.index <= end_of_month)]
x1 = x.groupby(['Customer']).sum()
x1['billing_date'] = end_of_month.date()
x1['day_in_month'] = num_days
billing_df = pd.concat([x1, billing_df])
billing_df['Cluster #1 cost'] = billing_df['Cluster #1']/billing_df['day_in_month'] * base_price[base_price['Cluster']=="Cluster #1"]['Cluster price / month'].values[0]
billing_df['Cluster #2 cost'] = billing_df['Cluster #2']/billing_df['day_in_month'] * base_price[base_price['Cluster']=="Cluster #2"]['Cluster price / month'].values[0]
billing_df['Cluster #3 cost'] = billing_df['Cluster #3']/billing_df['day_in_month'] * base_price[base_price['Cluster']=="Cluster #3"]['Cluster price / month'].values[0]
billing_df['Cluster #4 cost'] = billing_df['Cluster #4']/billing_df['day_in_month'] * base_price[base_price['Cluster']=="Cluster #4"]['Cluster price / month'].values[0]
billing_df['total_billing_amount'] = billing_df['Cluster #1 cost'] + billing_df['Cluster #2 cost'] + billing_df['Cluster #3 cost'] + billing_df['Cluster #4 cost']
billing_df = billing_df.rename(columns={"Cluster #1": "Cluster #1 usage in days",
"Cluster #2": "Cluster #2 usage in days",
"Cluster #3": "Cluster #3 usage in days",
"Cluster #4": "Cluster #4 usage in days",})
final = billing_df.drop('day_in_month', axis=1)
final = final.round({'Cluster #1 cost': 2,
'Cluster #2 cost': 2,
'Cluster #3 cost': 2,
'Cluster #4 cost': 2,
'billing_amount': 2})
final = final.sort_values(by=['billing_date', 'Customer'])
pivot_ui(final,outfile_path='invoice_pivot.html')
#HTML('invoice_pivot.html')
For YoY comparison, we can use monthly , quarterly, yearly or any period that makes sense for the business. In addition, depending on the purpose of the report, we also can have the YoY per client or per client's industry/ sector, etc.
In this challenge, as a proof of concept I will chose monthly and yearly to illustrate how the interactive chart would look like. There is no addition information regarding the customer hence we will have the YoY per client.
In each chart you can zoom in/out, select/de-select the customer using the label on the right side.
yoy_df = final.copy()
yoy_df = yoy_df.reset_index().set_index('billing_date')
yoy_df.index = pd.to_datetime(yoy_df.index)
monthly_df = yoy_df.copy()
monthly_df['change'] = monthly_df.groupby([monthly_df.index.month,'Customer'])['total_billing_amount'].\
pct_change(fill_method='ffill')*100
monthly_df = monthly_df.reset_index()
fig = px.line(
monthly_df[monthly_df.change.notnull()],
x = 'billing_date',
y = 'change',
color = 'Customer',
title = "Monthly billing amount YoY changes in percentage per customer",
text = 'change',
labels=dict(billing_date="Month", change="YoY change in monthly billing amount (%)")
)
fig.update_traces(texttemplate='%{text:.3s}%')
fig.show(renderer="notebook")
pre_year = yoy_df.copy()
pre_year = pd.DataFrame(pre_year.groupby([pre_year.index.year, 'Customer'])['total_billing_amount'].sum())
pre_year['change']=pre_year.groupby(['Customer'])['total_billing_amount'].pct_change()*100
pre_year = pre_year.reset_index()
fig = px.line(
pre_year[pre_year['change'].notnull()],
x = 'billing_date',
y = 'change',
color = 'Customer',
category_orders={'billing_date':list(pre_year['billing_date'])},
title = "Monthly billing amount YoY changes in percentage per customer",
text = 'change',
labels=dict(billing_date="Year", change="YoY hange in yearly billing amount (%)")
)
fig.update_traces(texttemplate='%{text:.3s}%')
fig.update_xaxes(type='category')
fig.show(renderer="notebook")
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show()
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Show code"></form>''')